#World Bank as an Enforcer of Human Rights
#This file reproduces all of the figures in the main manuscript
#Compilation of final script: Kaitlyn King (KTK)
#Original code by members of the SPEC Lab: Miriam Barnum, Benjamin Graham, Kaitlyn King, 
# Sarah Lu, Amanda Scott, Madeline Zheng, & Kelebogile Zvobgo, 
#Compilation File Created: 01.15.2020
#Updated: 03.29.2020

#Clear 
rm(list=ls())

#setwd
#setwd("/Users/kaitlynking/Downloads")
setwd("/Volumes/GoogleDrive/My Drive/IFIs_and_HR/Data/Replication materials")

#load packages
library(tidyr)
library(dplyr)
library(readstata13)
library(ggplot2)
library(MASS)
library(foreign)
library(plotly)
library(gridExtra)
library(grid)
library(choroplethr)
library(choroplethrMaps)

#load the data
dat <- read.dta("2_WBHR_Data_zg2020.dta")

###################################################################################
# The following code reproduces Figure 1 (Geographical Spread of Inspection Panel and CAO Cases) in the main text. #
###################################################################################

allcountries <- sapply(dat$country, tolower) #get the column "country" from the data, transform to lowercase

#edit country names to match with choroplethr data
allcountries <- replace(allcountries, allcountries=="congo, democratic republic of the", "democratic republic of the congo")
allcountries <- replace(allcountries, allcountries=="kyrgyz republic", "kyrgyzstan")
allcountries <- replace(allcountries, allcountries=="yemen, republic of", "yemen")
allcountries <- replace(allcountries, allcountries=="tanzania", "united republic of tanzania")
allcountries <- replace(allcountries, allcountries=="serbia", "republic of serbia")

countries <- factor(allcountries) #split into factors

countrycount <- table(countries) #turn into a table with countries + their counts

df <- as.data.frame(countrycount) #turn table into dataframe

df <- rename(df, region = countries, value = Freq)

df["value"] <- log(df["value"]) + 1 #log the numbers

country_choropleth(df, title = "", legend = "cases", num_colors = 1) +
  scale_fill_continuous(low="#e6e6e6", high="#262626", na.value="#ffffff")


###################################################################################
# The following code reproduces Figure 2 (Projects Triggering Inspection Panel and CAO Complaints) in the main text. #
###################################################################################


counts <- c(0)

#loop through 9 times (one for each project type)
for(i in 1:9){
  ip_temp <- 0
  cao_temp <- 0
  
  #loop through all rows in dataframe
  for(j in 1:nrow(dat)){
    
    if(dat[j, "cao"] == 0 && dat[j, "project_type"] == i){
      ip_temp = ip_temp + 1
    }
    else if(dat[j, "cao"] == 1 && dat[j, "project_type"] == i){
      cao_temp = cao_temp + 1
    }
  }
  
  counts <- append(counts, ip_temp)  
  counts <- append(counts, cao_temp)
  
}

#get rid of the placeholder
counts <- counts[2:19]


##### CREATE DATA FRAME #############
institution1 <- c("Inspection Panel", "Compliance Advisor/Ombudsman")
institution2 <- rep(institution1, 9)

projecttype <- c("1", "1", "2", "2", "3", "3", "4", "4", "5", "5", "6", "6", "7", "7", "8", "8", "9", "9")

df <- data.frame(
  Institution = factor(institution2, 
                       levels = c("Inspection Panel", "Compliance Advisor/Ombudsman")),
  
  Project = factor(projecttype,
                   levels = c("1","2","3","4","5","6","7","8","9")),
  
  Num_cases = counts
)


############################### MAKE GRAPH ###############################

p= ggplot(data = df, aes(x = Project, y = Num_cases, fill = Institution))+
  geom_bar(stat = "identity", position = position_dodge(), width = 0.75)+
  
  #change y axis to go from 0 - 45, brings the y axis down to 0
  scale_y_continuous(limits = c(0,80), expand = c(0, 0))+
  
  #modify legend, change bar colors
  scale_fill_manual(values=c("#000000", "#b2b2b2"), 
                    name="", #legend title
                    breaks=c("Inspection Panel", "Compliance Advisor/Ombudsman"))+
  
  scale_x_discrete(breaks = c("1", "2", "3", "4", "5", "6", "7", "8", "9"),
                   labels = c("Infrastructure", 
                              "Mining/Resource Extraction", "Land Management/Administration",
                              "Environmental Sustainability", "Agriculture and Forestry", 
                              "Manufacturing and Services",
                              "Other (General Poverty Reduction)", 
                              "Government Capacity Building", "Private Sector Capacity Building"))+
  
  theme_classic() + theme(panel.grid=element_blank(), panel.border=element_blank())+
  theme(text = element_text(size=14),
        legend.position = c(0.85, 0.9),
        axis.title.x = element_blank(),
        axis.text.x = element_text(hjust=1, angle=45))+ #no x axis title
  
  #x and y axis title
  ylab("Number of Cases")+
  #xlab("Issue")+
  
  #plot title
  labs(title = "")

p



###################################################################################
# The following code reproduces Figure 3 (Primary Issues Raised) in the main text.#
###################################################################################


for(x in c(1:nrow(dat))){
  if(dat[x, 'issue_primary'] == -11){
    is.na(dat[x, 'issue_primary']) = NA
  }
}

sumIP = sum(dat$cao == 0) - sum(is.na(dat$issue_primary))
sumCAO = sum(dat$cao == 1) 

IPvector = c()
for(x in c(1:9)){
  counter = 0
  variable = 0
  for(n in c(1:nrow(dat))){
    if(!is.na(dat[n, 'issue_primary']) && dat[n, 'issue_primary'] == x && dat[n, 'cao'] == 0){
      counter = counter + 1
    }
  }
  variable = counter/sumIP
  IPvector = c(IPvector, variable)
}

CAOvector = c()
for(x in c(1:9)){
  counter = 0
  variable = 0
  for(n in c(1:nrow(dat))){
    if(!is.na(dat[n, 'issue_primary']) && dat[n, 'issue_primary'] == x && dat[n, 'cao'] == 1){
      counter = counter + 1
    }
  }
  variable = counter/sumCAO
  CAOvector = c(CAOvector, variable)
}

df = rbind(IPvector, CAOvector)
df = as.data.frame(df)
colnames(df) = c(1:9)


graphLabels = c("1"="Involuntary resettlement","2"="Environmental damage","3"="Economic damage",
                "4"="Labor rights violations","5"="Physical/cultural heritage rights violations",
                "6"="Physical integrity rights violations","7"="Failure to communicate",
                "8"="Corruption","9"="Misalignment/Project failure")


dat.g <-gather(df, type, value )
dat.g$indicator = c("IP", "CAO")


ggplot(dat.g,aes(type, value)) +
  geom_bar(aes(fill = indicator), stat= "identity", position=position_dodge(), width = .7) +
  scale_fill_grey(start=.1, end=.75, name="",
                  labels=c("Inspection Panel", "Compliance Advisor/Ombudsman"))+         
  scale_y_continuous(name = "Percentage of Cases", labels=scales::percent, expand = c(0, 0), limits=c(0, 1)) +
  scale_x_discrete(name=" ",
                   labels=graphLabels) +
  theme_classic() + theme(panel.grid=element_blank(), panel.border=element_blank())+
  theme(text = element_text(size=18),
        axis.text.x = element_text(hjust=1, angle=45),
        legend.position = c(0.75, 0.75))


###################################################################################
# The following code reproduces Figure 4 (Success at the Inspection Panel and CAO) in the main text. #
###################################################################################

attach(dat) #makes the variables in the .dta file usable in R (can directly use variables like 'cao')

#use table(row, col) function to get counts of the outcomes for cao and non-cao(IP)
mytable1=table(harm_acknowledged,cao) 
mytable2=table(bank_noncompliance,cao)
mytable3=table(project_halted,cao)
mytable4=table(project_change,cao)
mytable5=table(compensation,cao)
mytable6=table(perpetrators_punished,cao)

#get proportions (by column), store in new matrices, convert to percentage
m1 = prop.table(mytable1, 2) * 100
m2 = prop.table(mytable2, 2) * 100
m3 = prop.table(mytable3, 2) * 100
m4 = prop.table(mytable4, 2) * 100
m5 = prop.table(mytable5, 2) * 100

#last column- any favorable outcome
all_cao = 0
all_ip = 0

cao_total = 0
ip_total = 0

for(i in 1:nrow(dat)){
  if(dat[i, "cao"] == 1){
    cao_total = cao_total + 1
    if((!is.na(dat[i, "harm_acknowledged"]) && !is.na(dat[i, "bank_noncompliance"]) && 
        !is.na(dat[i, "project_halted"]) && !is.na(dat[i, "project_change"]) && !is.na(dat[i, "compensation"])) 
       &&
       (dat[i, "harm_acknowledged"]== 1 || dat[i, "bank_noncompliance"] == 1 || dat[i, "project_halted"] == 1 || 
        dat[i, "project_change"] == 1 || dat[i, "compensation"] == 1)){
      all_cao = all_cao + 1
    }
  }
}

for(i in 1:nrow(dat)){
  if(dat[i, "cao"] == 0){
    ip_total = ip_total + 1
    if((!is.na(dat[i, "harm_acknowledged"]) && !is.na(dat[i, "bank_noncompliance"]) && 
        !is.na(dat[i, "project_halted"]) && !is.na(dat[i, "project_change"]) && !is.na(dat[i, "compensation"])) 
       &&
       (dat[i, "harm_acknowledged"]== 1 || dat[i, "bank_noncompliance"] == 1 || dat[i, "project_halted"] == 1 || 
        dat[i, "project_change"] == 1 || dat[i, "compensation"] == 1)){
      all_ip = all_ip + 1
    }
  }
}



all_cao = all_cao*100/cao_total
all_ip = all_ip*100/ip_total


#create data frame with cao, outcomes, and percentage vectors
data2 =data.frame(
  cao = factor(c("IP", "CAO", "IP", "CAO", "IP", "CAO", "IP", "CAO", "IP", "CAO", "IP", "CAO"), 
               levels = c("IP", "CAO")),
  
  Outcomes = factor(c("Harm Acknowledged", "Harm Acknowledged",
                      "Bank Noncompliance", "Bank Noncompliance",
                      "Project Halted", "Project Halted",
                      "Project Change", "Project Change",
                      "Compensation", "Compensation",
                      "Any Favorable Outcome", "Any Favorable Outcome"), 
                    levels = c("Harm Acknowledged", 
                               "Bank Noncompliance", 
                               "Project Halted", 
                               "Project Change",
                               "Compensation",
                               "Any Favorable Outcome")),
  
  Percentage = c(m1[2,], m2[2,], m3[2,], m4[2,], m5[2,], all_ip, all_cao)
)


#make bar chart
p= ggplot(data = data2, aes(x = Outcomes, y = Percentage, fill = cao))+
  geom_bar(stat = "identity", position = position_dodge(), width = 0.75)+
  
  #change y axis to go from 0 - 100, brings the y axis down to 0
  scale_y_continuous(limits = c(0,100), expand = c(0, 0))+
  
  #modify legend, change bar colors
  scale_fill_manual(values=c("#181818", "#BFBFBF"), 
                    name="", #legend title
                    breaks=c("IP", "CAO"),
                    labels=c("Inspection Panel", "Compliance Advisor/Ombudsman"))+
  
  theme_classic() + theme(panel.grid=element_blank(), panel.border=element_blank())+
  theme(text = element_text(size=18),
        legend.position = c(0.75, 0.85),
        axis.title.x = element_blank(),
        axis.text.x = element_text(hjust=1, angle=45))+ #no x axis title
  
  #y axis title
  ylab("Percentage of Cases")

p


###################################################################################
# The following code reproduces Figure 5 (Outcomes by Primary Issues Raised) in the main text. #
###################################################################################

########## IP bar plot ##########

ip_dat <- dat %>% 
  filter(cao == 0 & issue_primary < 7) %>%
  select(issue_primary, no_positive, harm_acknowledged, compensation, project_change) %>%
  group_by(issue_primary) %>% dplyr::summarise_all(mean) %>%
  pivot_longer(cols = -issue_primary, names_to = "outcome_type", values_to = "perc_cases")

ip_dat$perc_cases <- ip_dat$perc_cases * 100
ip_dat$outcome_type <- factor(ip_dat$outcome_type, levels = ip_dat$outcome_type[1:4])

p <- ggplot(data = ip_dat, aes(x = as.factor(issue_primary), y = perc_cases, fill = outcome_type))+
  geom_bar(stat = "identity", position = position_dodge(), width = 0.75) +
  
  #change y axis to go from 0 - 100, brings the y axis down to 0
  scale_y_continuous(limits = c(0,100), expand = c(0, 0)) +
  
  #modify legend, change bar colors
  scale_fill_manual(values = c( "#b2b2b2", "#777777", "#444444", "#000000"), 
                    name = "", #legend title
                    labels = c("No Positive Outcome", "Harm Acknowledged", "Project Change", "Compensation"))+
  
  scale_x_discrete(breaks = c("1", "2", "3", "4", "6"),
                   name = "",
                   labels = c("Involuntary Resettlement", 
                              "Environmental Damage", "Economic Damage",
                              "Labor Rights Violations", "Physical Integrity Rights"),
                   limits=c("1","2","3","4","6"))+
  
  theme_classic() + theme(panel.grid=element_blank(), panel.border=element_blank())+
  theme(text = element_text(size=14),
        legend.position = c(0.25, 0.9),
        axis.title.x = element_blank(),
        axis.text.x = element_text(hjust=1, angle=50)) + #no x axis title
  
  #x and y axis title
  ylab("Percentage of Cases")+
  #xlab("Issue")+
  
  #plot title
  labs(title = "Inspection Panel")

########## CAO bar plot ##########

cao_dat <- dat %>% 
  filter(cao == 1 & issue_primary < 7) %>%
  select(issue_primary, no_positive, harm_acknowledged, compensation, project_change) %>%
  group_by(issue_primary) %>% dplyr::summarise_all(mean, na.rm = TRUE) %>%
  pivot_longer(cols = -issue_primary, names_to = "outcome_type", values_to = "perc_cases")

cao_dat$perc_cases <- cao_dat$perc_cases * 100
cao_dat$outcome_type <- factor(cao_dat$outcome_type, levels = cao_dat$outcome_type[c(1,2,4,3)])

q <- ggplot(data = cao_dat, aes(x = as.factor(issue_primary), y = perc_cases, fill = outcome_type))+
  geom_bar(stat = "identity", position = position_dodge(), width = 0.75) +
  
  #change y axis to go from 0 - 100, brings the y axis down to 0
  scale_y_continuous(limits = c(0,100), expand = c(0, 0)) +
  
  #modify legend, change bar colors
  scale_fill_manual(values = c( "#b2b2b2", "#777777", "#444444", "#000000"), 
                    name = "", #legend title
                    labels = c("No Positive Outcome", "Harm Acknowledged", "Project Change", "Compensation"))+
  
  scale_x_discrete(breaks = c("1", "2", "3", "4", "6"),
                   name = "",
                   labels = c("Involuntary Resettlement", 
                              "Environmental Damage", "Economic Damage",
                              "Labor Rights Violations", "Physical Integrity Rights"),
                   limits=c("1","2","3","4","6"))+
  
  theme_classic() + theme(panel.grid=element_blank(), panel.border=element_blank())+
  theme(text = element_text(size=14),
        legend.position = c(0.25, 0.9),
        axis.title.x = element_blank(),
        axis.text.x = element_text(hjust=1, angle=50))+ #no x axis title
  
  #x and y axis title
  ylab("Percentage of Cases")+
  #xlab("Issue")+
  
  #plot title
  labs(title = "Compliance Advisor/Ombudsman")

# Multiple plot function from: http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  plots <- c(list(...), plotlist)
  numPlots = length(plots)
  if (is.null(layout)) {
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  if (numPlots==1) {
    print(plots[[1]])
  } else {
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    for (i in 1:numPlots) {
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,                                      layout.pos.col = matchidx$col))
    }
  }
}

figure5 <- multiplot(p,q, cols=2)

png("figure5_March2020.png", res = 300, width = 3000, height = 2000)
figure5 <- multiplot(p,q, cols=2)
dev.off()

pdf("figure5_March2020.pdf")
figure5 <- multiplot(p,q, cols=2)
dev.off()

###################################################################################
# The following code reproduces Figure 6 (NGO Involvement and Complaint Fundamentals) in the main text. #
###################################################################################

#######get percentages for complaint variable

#count function turns frequency table into a data frame
df1 <- count(dat, c('complaint', 'foreign_ngo', 'domestic_ngo')) 

complaint_freq <- df1[6, "freq"] #gets frequency of complaint for no ngos
complaint_total <- 0

#get total count for raw_data = 1
for(i in 6:9){
  complaint_total = complaint_total + df1[i, "freq"]
}

no_ngo_complaint_pct <- 100*complaint_freq/complaint_total
ngo_complaint_pct <- 100 - no_ngo_complaint_pct


#####get percentages for raw data variable

#count function turns frequency table into a data frame
df2 <- count(dat, c('raw_data', 'foreign_ngo', 'domestic_ngo')) 

raw_freq <- df2[5, "freq"] #gets frequency of raw_data for no ngos
raw_total <- 0

#get total count for raw_data = 1
for(i in 5:8){
  raw_total = raw_total + df2[i, "freq"]
}

no_ngo_raw_pct <- 100*raw_freq/raw_total
ngo_raw_pct <- 100 - no_ngo_raw_pct


######get percentages for testimony data variable

#count function turns frequency table into a data frame
df3 <- count(dat, c('personal_testimony', 'foreign_ngo', 'domestic_ngo')) 

testimony_freq <- df3[5, "freq"] #gets frequency of testimony for no ngos
testimony_total <- 0

#get total count for raw_data = 1
for(i in 5:7){
  testimony_total = testimony_total + df3[i, "freq"]
}

no_ngo_test_pct <- 100*testimony_freq/testimony_total
ngo_test_pct <- 100 - no_ngo_test_pct


#######get percentages for bank policy variable

#count function turns frequency table into a data frame
df4 <- count(dat, c('bank_policy', 'foreign_ngo', 'domestic_ngo')) 

policy_freq <- df4[5, "freq"] #gets frequency of raw_data for no ngos
policy_total <- 0

#get total count for bank policy = 1
for(i in 5:8){
  policy_total = policy_total + df4[i, "freq"]
}

no_ngo_policy_pct <- 100*policy_freq/policy_total
ngo_policy_pct <- 100 - no_ngo_policy_pct


####get percentages for policy violated variable

#count function turns frequency table into a data frame
df5 <- count(dat, c('bank_policy_viol', 'foreign_ngo', 'domestic_ngo')) 

viol_freq <- df5[5, "freq"] #gets frequency of raw_data for no ngos
viol_total <- 0

#get total count for bank policy violated = 1
for(i in 5:8){
  viol_total = viol_total + df5[i, "freq"]
}

no_ngo_viol_pct <- 100*viol_freq/viol_total
ngo_viol_pct <- 100 - no_ngo_viol_pct



#####CREATE DATA FRAME

ngotype1 <- c("NGO Involvement", "No NGO Involvement")
ngotype2 <- rep(ngotype1, 5)

finalDF =data.frame(
  ngotype = factor(ngotype2, 
                   levels = c("NGO Involvement", "No NGO Involvement")),
  
  complaint_quality = factor(c("Complaint", "Complaint",
                               "Raw Data", "Raw Data",
                               "Personal Testimony", "Personal Testimony",
                               "Bank Policy Citation", "Bank Policy Citation",
                               "Bank Policy Violation", "Bank Policy Violation"), 
                             levels = c("Complaint", 
                                        "Raw Data", 
                                        "Personal Testimony", 
                                        "Bank Policy Citation",
                                        "Bank Policy Violation")),
  
  Percentage = c(ngo_complaint_pct, no_ngo_complaint_pct, ngo_raw_pct, no_ngo_raw_pct, 
                 ngo_test_pct, no_ngo_test_pct, ngo_policy_pct, no_ngo_policy_pct, 
                 ngo_viol_pct, no_ngo_viol_pct)
)

p= ggplot(data = finalDF, aes(x = complaint_quality, y = Percentage, fill = ngotype))+
  geom_bar(stat = "identity", position = position_dodge(), width = 0.75)+
  
  #change y axis to go from 0 - 100, brings the y axis down to 0
  scale_y_continuous(limits = c(0,75), expand = c(0, 0))+
  
  #modify legend, change bar colors
  scale_fill_manual(values=c("#181818", "#BFBFBF"),
                    name="")+ #legend title
  #breaks=c("IP", "CAO"),
  #labels=c("Inspection Panel", "Compliance Advisor/Ombudsman"))+
  
  theme_classic() + theme(panel.grid=element_blank(), panel.border=element_blank())+
  theme(text = element_text(size=18),
        legend.position = c(0.9, 0.95),
        axis.title.x = element_blank(),
        axis.text.x = element_text(hjust=1, angle=45))+ #no x axis title
  
  #y axis title
  ylab("Percentage of Cases")

p

######## figure w/o complaint variable

DF_withoutcomplaint <- finalDF[c(3:10),] #remove first 2 rows of old df by subsetting

q= ggplot(data = DF_withoutcomplaint, aes(x = complaint_quality, y = Percentage, fill = ngotype))+
  geom_bar(stat = "identity", position = position_dodge(), width = 0.75)+
  
  #change y axis to go from 0 - 100, brings the y axis down to 0
  scale_y_continuous(limits = c(0,75), expand = c(0, 0))+
  
  #modify legend, change bar colors
  scale_fill_manual(values=c("#181818", "#BFBFBF"),
                    name="")+ #legend title
  
  theme_classic() + theme(panel.grid=element_blank(), panel.border=element_blank())+
  theme(text = element_text(size=18),
        legend.position = c(0.9, 0.95),
        axis.title.x = element_blank(),
        axis.text.x = element_text(hjust=1, angle=45))+ #no x axis title
  
  #y axis title
  ylab("Percentage of Cases")

q
